package com.webull.information.center.carwler.common.util.jsoup.prase_en;

import java.io.IOException;
import java.text.ParseException;
import java.util.Date;
import java.util.Optional;

import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Connection;
import org.jsoup.Jsoup;

import com.webull.framework.util.UtilDate;
import com.webull.information.center.carwler.common.model.NewsInformation;
import com.webull.information.center.carwler.common.util.jsoup.HtmlBodyPrase;

/**
 * http://news.cmlviz.com/
 * 
 * @author shimingjun
 * @date 2016年8月19日 下午8:30:39
 * @version 1.0
 * @since JDK 1.8
 */
public class Cmlviz_HtmlPrase implements HtmlBodyPrase {
	protected final Logger logger = LogManager.getLogger(getClass());

	/**
	 * for example
	 * :http://news.cmlviz.com/2016/07/21/tivo-inc-and-verint-systems-inc-head-
	 * to-head-compare.html
	 */
	@Override
	public void praseNewsInfo(org.jsoup.nodes.Document doc, NewsInformation info) {
		try {

			// title
			Optional.ofNullable(doc.select("h2> span.article_title")).map(tit0 -> tit0.first())
					.map(tit1 -> StringUtils.stripToNull(tit1.html())).ifPresent(tit3 -> info.setTitle(tit3));
			// sourceName
			if (StringUtils.isBlank(info.getSourceName())) {
				info.setSourceName("CML News");
			}

			// new time
			Optional.ofNullable(doc.select("span[itemprop=datePublished]")).map(tit0 -> tit0.first())
					.map(tit1 -> StringUtils.stripToNull(tit1.html())).ifPresent(pdate -> {
						info.setPushTime(pdate);
						try {
							Date d = UtilDate.parse(pdate, "yyyy-MM-dd");
							Optional.ofNullable(d).ifPresent(d0 -> info.setNewsTime(d));
						} catch (Exception e) {
						}
					});

		} catch (Exception e) {
			logger.warn(e);
		}
	}

	public static void main(String[] args) throws ParseException, IOException {
		String url2 = "http://news.cmlviz.com/2016/07/21/tivo-inc-and-verint-systems-inc-head-to-head-compare.html";
		url2 = "http://news.cmlviz.com/2016/07/14/china-jo-jo-drugstores-inc--nasdaqcm-cjjd-fundamental-star-rating-report.html";
		Connection connection = Jsoup.connect(url2).userAgent(
				"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36")
				// .header("x-client-data",
				// "CIq2yQEIpbbJAQjEtskBCP2VygEIwpjKAQjwnMoB")
				.header("x-client-data", RandomStringUtils.randomAlphanumeric(40));

		connection.proxy("127.0.0.1", 1080);
		org.jsoup.nodes.Document doc = connection.timeout(10000).get();
		NewsInformation info = new NewsInformation();
		new Cmlviz_HtmlPrase().praseNewsInfo(doc, info);
		System.out.println(info);

		// Mon Mar 17, 2014 9:46pm EDT
		// Wed Jan 15 00:00:00 CST 2014
		// Date d = UtilDate.parse("Mon Mar 17, 2014 9:46pm", Locale.US,
		// TimeZone.getTimeZone("EDT"), "E MMM dd, yyyy hh:mma");
		// System.out.println(d);

	}
}
